# msdn_crawler.rb

require 'open-uri'
require 'hpricot'
require 'yaml'

$URL_PREFIX = "http://www.microsoft.com/china/msdn/vstudio/glossary/Glossary"
$terms = Hash.new

def grab(url)
  yield open(url, 'r').read
end

def parse_terms(content)
  doc = Hpricot content
  elements = []
  (doc/"p.lastInCell").each do |cell|
    elements << cell.inner_html
  end
  
  (0...elements.size).step(2) do |i|
    $terms[elements[i+1]] = elements[i]
  end 
end


('A'..'Z').each do |c|
  url = $URL_PREFIX + c + '.mspx'
  grab(url) do |content|
    parse_terms content
  end
end

open('dev_dict1.yml', 'w') do |f|
  f.print $terms.to_yaml
end



